home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Atari Mega Archive 2
/
Atari Mega Archive CD - Volume 2.iso
/
minix
/
up1510b.tgz
/
up1510b
/
src
/
commands
/
gather.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-07-19
|
8KB
|
340 lines
/* gather - collect files for mailing Author: Andy Tanenbaum */
/* It sometimes happens that one needs to mail a large directory full of
* files to someone. This program can be used to collect these files into
* shar archives, compress and uuencode them. The interesting property that
* it has is that it makes sure that none of the archives are too big, and
* that no files are split over two archives.
*
* Syntax: gather [-s source_dir] [-d dest_dir] [-b max_arch_size] [-f file]
*
* -s source directory (where are the files to be sent)
* -d destination dir (where should the archives be put)
* -b bytes (maximum size of the archives; default 60K)
* -f file (use file_00.uue etc as archive names)
*
* Examples:
* gather # make 60K archives in this dir
* gather -d mailings -b 50000 # make 50K archives in mailings
*
* Note:
* The maximum size given by -b (default 60000 bytes) is only an
* approximation, since it is hard to tell how big the final file
* will be after shar'ing, compressing, and uue'ing. A heuristic
* is used.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#define DEFAULT 60000 /* default archive size */
#define MAX_DIR_ENT 512 /* how many directory entries allowed */
#define HEAP_SIZE 20000 /* storage size for all file names */
#define NAME_SIZE 4096 /* storage size for current command */
#define BASE_SIZE 7 /* max number of chars in basename */
#define PATH_MAX 512 /* largest path name */
#define NUMERATOR 138L /* heuristic parameter */
#define DENOMINATOR 100L /* heuristic parameter */
char heap[HEAP_SIZE + 2]; /* dir entries stored here */
char names[NAME_SIZE]; /* file name lists constructed here */
char work[NAME_SIZE]; /* scratch buffer */
char base_name[BASE_SIZE + 1]; /* base name to use for the archives */
char target[PATH_MAX]; /* storage for target file names */
struct dir_ent {
char *file_name;
long file_size;
} dir_ent[MAX_DIR_ENT];
long atol(), heuristic();
char *getcwd();
main(argc, argv)
int argc;
char *argv[];
{
/* Parse the command and get ready. */
int i, counter, l, s, nonlocal;
char *p, num[3];
struct dirent *d;
DIR *dirp;
struct stat stbuf;
int first; /* first entry not used yet */
int limit; /* number of files in src_dir */
char *src_dir = "."; /* pointer to source directory */
char *dst_dir = "."; /* pointer to destination directory */
char *file = ""; /* name to use */
long max_bytes = DEFAULT; /* max archive size (approx.) */
long cutoff; /* max cumulative input size */
if (argc > 9) usage();
i = 1;
while (i < argc) {
/* Examine the i-th argument. */
p = argv[i];
if (*p != '-') usage();
switch (*(p + 1)) {
case 's': src_dir = argv[i + 1]; break;
case 'd': dst_dir = argv[i + 1]; break;
case 'f': file = argv[i + 1]; break;
case 'b':
max_bytes = atol(argv[i + 1]);
if (max_bytes <= 0) {
fprintf(stderr, "gather: bad -b value\n");
exit(1);
}
break;
default:
fprintf(stderr, "gather: unknown flag %s\n", p);
exit(1);
}
i += 2;
}
/* Determine the basename. */
get_basename(src_dir, file);
/* Open the source directory. */
i = 0;
p = heap;
if ((dirp = opendir(src_dir)) == (DIR *) NULL) {
fprintf(stderr, "gather: cannot open %s\n", src_dir);
exit(2);
}
/* Read in all the file names. */
while (1) {
d = readdir(dirp);
if (d == (struct dirent *) NULL) break;
l = strlen(d->d_name);
if (p + l >= &heap[HEAP_SIZE] || i >= MAX_DIR_ENT) {
fprintf(stderr, "gather: %s is too large\n", src_dir);
exit(2);
}
strcpy(work, src_dir);
strcat(work, "/");
strcat(work, d->d_name);
stat(work, &stbuf);
if ((stbuf.st_mode & S_IFMT) == S_IFDIR) continue;
dir_ent[i].file_name = p;
strcpy(p, d->d_name);
dir_ent[i].file_size = stbuf.st_size;
p += l + 1;
i++;
}
limit = i;
closedir(dirp);
/* Sort the names. */
sort_dir(limit);
/* Figure out when to stop reading files. */
cutoff = heuristic(max_bytes);
/* Collect files into archives. */
first = 0;
counter = 0;
while (first < limit) {
first = collect(first, limit, cutoff);
num[0] = '0' + (counter / 10);
num[1] = '0' + (counter % 10);
num[2] = 0;
/* Construct full path of compressed target. */
target[0] = 0;
if (strcmp(dst_dir, ".") != 0) {
strcpy(target, dst_dir);
strcat(target, "/");
}
strcat(target, base_name);
strcat(target, "_");
strcat(target, num);
strcat(target, ".Z");
/* (cd src; shar file ... | compress -fc) >dir/base.00.Z */
nonlocal = strcmp(src_dir, ".");
work[0] = 0;
if (nonlocal) {
strcat(work, "(cd ");
strcat(work, src_dir);
strcat(work, "; ");
}
strcat(work, "shar ");
strcat(work, names);
strcat(work, " | compress -fc ");
if (nonlocal) strcat(work, ")");
strcat(work, " >");
strcat(work, target);
s = system(work);
if (s < 0) {
fprintf(stderr, "gather: shar command failed\n");
exit(2);
}
/* Uue dir/base.00.Z */
strcpy(work, "uue ");
strcat(work, target);
strcat(work, "\n");
s = system(work);
if (s < 0) {
fprintf(stderr, "gather: uue command failed\n");
exit(2);
}
/* Unlink dir/base.00.Z */
unlink(target);
counter++;
}
}
int collect(first, limit, cutoff)
int first;
int limit;
long cutoff;
{
/* See how many files will fit in an archive. */
int nr_files;
long cum_size, size;
struct dir_ent *p, *endp;
names[0] = 0;
p = &dir_ent[first];
endp = &dir_ent[limit];
nr_files = 0;
cum_size = 0;
while (p < endp) {
size = p->file_size;
if (size > cutoff) {
fprintf(stderr, "gather: %s is too big\n", p->file_name);
exit(2);
}
/* First peek to see if next file fits. If not, maybe some
* other file can be used instead. Swap them. */
if (cum_size + size > cutoff) fudge(p, endp, cutoff - cum_size);
/* If it fails now, there is no file that will fit. */
size = p->file_size;
if (cum_size + size > cutoff) return(p - dir_ent);
strcat(names, p->file_name);
strcat(names, " ");
cum_size += size;
p++;
}
return(p - dir_ent);
}
long heuristic(m)
long m;
{
/* The basic algorithm is to collect files up to some limit, and put
* them in an archive. It is tricky to determine how many files to
* collect, because they will be shar'ed, compressed and uue'ed.
* Thus we need a heuristic for guessing how to relate the total size
* of the input files to the size of the final uue archive. This
* heuristic is contained in this procedure. It takes the desired
* final size as input and produces the file cutoff as output. */
return((NUMERATOR * m) / DENOMINATOR);
}
sort_dir(limit)
int limit; /* how many entries in dir_ent */
{
/* Sort the directory using bubble sort. */
struct dir_ent *p, *q;
for (p = &dir_ent[0]; p < &dir_ent[limit - 1]; p++) {
for (q = p + 1; q < &dir_ent[limit]; q++) {
if (strcmp(p->file_name, q->file_name) > 0) swap(p, q);
}
}
}
swap(p, q)
struct dir_ent *p, *q;
{
/* Exchange two entries. */
char *cp;
long l;
cp = p->file_name;
l = p->file_size;
p->file_name = q->file_name;
p->file_size = q->file_size;
q->file_name = cp;
q->file_size = l;
}
fudge(p, endp, size)
struct dir_ent *p, *endp;
long size;
{
/* Look for a file that will fit (i.e., <= size). This fudging gives a more
* uniform distribution, and reduces the number of files needed.
*/
register struct dir_ent *q;
for (q = p + 1; q < endp; q++) {
if (q->file_size <= size) {
swap(p, q);
return;
}
}
}
get_basename(s, file)
char *s;
char *file;
{
/* Determine the basename and copy it to base_name. */
int fd, n;
char *p, *q;
if (*file != 0) p = file;
else if (strcmp(s, ".") == 0) {
if (getcwd(work, NAME_SIZE) == (char *) NULL) {
fprintf(stderr, "gather: could not get name of working dir\n");
exit(2);
}
p = work;
} else {
p = s;
}
q = p + strlen(p) - 1;
if (*q == '\n') {
*q = 0;
q--;
}
while (1) {
if (q < p || *q == '/') break;
q--;
}
strncpy(base_name, q + 1, BASE_SIZE);
}
usage()
{
fprintf(stderr, "Usage: gather [-b bytes] [-s src_dir] [-d dst_dir] [-f file]\n");
exit(1);
}